library(readr)
library(foreign)
library(plyr)
library(tidyverse)
library(psych)
library(labelled)
library(ggplot2)
library(plotly)
mtwins = read.spss(file = 'mtwins_ecv_faces_amy.sav', header = F, to.data.frame=TRUE, use.value.labels = F)
nis = read.spss(file = 'Suarez NIS data 05172021.sav', header = T, to.data.frame=TRUE, use.value.labels = F)
as.data.frame(attr(mtwins$twin_sex, 'value.labels'))
as.data.frame(attr(mtwins$pc_yrace, 'value.labels'))
as.data.frame(attr(mtwins$pc_education, 'value.labels'))
as.data.frame(attr(mtwins$pc_annincome, 'value.labels'))
mtwins[mtwins == 77] = NA
mtwins[mtwins == 88] = NA
mtwins[mtwins == -99] = NA
table(mtwins$twin_sex)
##
## 0 1
## 386 322
demos = select(mtwins, twin_age_yr, pc_yrace, pc_education, pc_annincome)
describe(demos)
apply((demos), 2, table)
## $twin_age_yr
##
## 7 8 9 10 11 12 13 14 15 16 17 18 19
## 6 4 12 38 36 58 62 146 150 96 80 18 2
##
## $pc_yrace
##
## 0 1 2 3 4 5 6
## 538 10 84 4 4 58 10
##
## $pc_education
##
## 1 3 4 5 6 7 8
## 2 8 52 156 122 232 134
##
## $pc_annincome
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 4 6 18 16 18 14 52 66 38 70 46 82 264
mtwins %>%
ggplot(aes(x = twin_age_yr)) +
geom_histogram(bins = 12, fill = 'white', col = 'black') +
labs(title = "Histogram of Twin Age",
x = "Twin Age (in years)", y = "Count") +
theme_classic() + theme(axis.title.x = element_text(size = 12, family = "Times"),
axis.title.y = element_text(size = 12, family = "Times"),
title = element_text(size = 12, face = 'bold', family = 'Times'))
race = data.frame(
group = c("White","Hispanic","Black","Native American/Native Alaskan","Asian/Pacific Islander","Biracial","Other"),
value = c(538, 10, 84, 4, 4, 58, 10)
)
# Create plotly interactive pie chart
fig = plot_ly(race,
labels = ~group,
values = ~value,
type = 'pie',
textfont=list(size=18))
fig = fig %>% layout(title = list(text='Twin Race/Ethnicity', font=list(size=24)),
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE, font=list(size=18)),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE, font=list(size=18)),
showlegend=TRUE, legend=list(font=list(size=20)))
m = list(l = 50, r = 50, b = 100, t = 100, pad = 4)
fig = fig %>% layout(autosize = F, width = 750, height = 500, margin=m)
fig
Neighborhood disadvantage is defined using geocoding of family addresses to assess the proportion of neighborhood residents living below the poverty line (10.5% was the average at the time of data collection) between 2011 and 2015 in each family’s census tract.
describe(mtwins$pov1115)
# How many families are living in neighborhoods where more than 10.5% of families in the neighborhood are living below the poverty line?
mtwins$belowPovLine <- ifelse(mtwins$pov1115 > .105, '>10.5% living below pov line', '<10.5% living below pov line')
table(mtwins$belowPovLine)
##
## <10.5% living below pov line >10.5% living below pov line
## 254 454
prop.table(table(mtwins$belowPovLine))
##
## <10.5% living below pov line >10.5% living below pov line
## 0.3587571 0.6412429
michigan = read_csv("/Users/gabrielasuarez/Dropbox (University of Michigan)/MiND_Lab/Research_Projects/nhood_social_processes/Analysis/ACS_allCTs_2008-2012_families.csv")
# Rename the pov_2015 variable to pov1115
michigan$pov1115 = michigan$DP03_0119PE
michigan$pov1115 = michigan$pov1115/100
# Create two new dataframes only with the data that I need
michigan_dat = dplyr::select(michigan, pov1115)
mtwins_dat = dplyr::select(mtwins, pov1115)
# Make a new column in each dataframe that will be a variable to identify where they came from later
mtwins_dat$Group = 'MTwiNS Sample'
michigan_dat$Group = 'State of Michigan'
# Combine into new dataframe "nhood_pov"
nhoodPov = rbind(mtwins_dat, michigan_dat)
# Calculate the mean in each group
mu = ddply(nhoodPov, "Group", summarise, grp.mean=mean(pov1115, na.rm=T))
ggplot(nhoodPov, aes(pov1115, fill = Group)) +
geom_density(alpha = 0.4, position = 'identity') +
geom_vline(data=mu, aes(xintercept=grp.mean, color = Group), linetype="dashed", size= 1) +
scale_color_brewer(palette="Set1") +
scale_fill_brewer(palette="Set1") +
labs(title = "American Community Survey (ACS) 5-Year Average",
x = "Neighborhood Poverty (2011-2015)", y = "Density")+
theme_light() + theme(axis.title.x = element_text(size = 12, family = 'Times'),
axis.title.y = element_text(size = 12, family = 'Times'),
title = element_text(size = 12, face = 'bold', family = 'Times'))
palette1 = c("#009966","#56B4E9")
plot = ggplot(nhoodPov, aes(x = pov1115, fill = Group, color = Group)) +
geom_histogram(binwidth = 0.05, alpha = 0.3, position = 'identity') +
geom_vline(data=mu, aes(xintercept=grp.mean, color = Group), linetype="dashed", size=1) +
theme(legend.position="bottom") +
scale_color_manual(values=palette1) +
scale_fill_manual(values=palette1) +
scale_x_continuous(name="Percentage of families whose income is below the poverty level", breaks = seq(0, 1, 0.1)) +
scale_y_continuous(name="Count", breaks = seq(0, 800, 100)) +
coord_cartesian(xlim=c(0, 1), ylim=c(0, 800)) +
#labs(title = "American Community Survey (ACS) 5-Year Average")+
theme_light() + theme(axis.title.x = element_text(size = 12, face = "bold", family="Times"),
axis.title.y = element_text(size = 12, face = "bold", family="Times"),
legend.position = "top",
legend.title = element_text(size = 12, face = "plain", family="Times"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12, face = "bold", family="Times"))
plot
#ggsave("neighborhood_dist.png", plot=plot, width = 30, height = 20, unit = "cm", dpi=600)
ggplot(nhoodPov, aes(x = pov1115, fill = Group, color = Group))+
geom_histogram(binwidth = 0.05, alpha = 0.6, aes(y = ..density..), position = 'identity')+
geom_vline(data=mu, aes(xintercept=grp.mean, color = Group), linetype="dashed")+
geom_density(alpha=0.1)+
scale_color_brewer(palette="Set1")+
scale_fill_brewer(palette="Set1")+
labs(title = "American Community Survey (ACS) 5-Year Average",
x = "Neighborhood Poverty Rates (2011-2015)", y = "Density/Count")+
theme_light() + theme(axis.title.x = element_text(size = 12, family = 'Times'),
axis.title.y = element_text(size = 12, family = 'Times'),
title = element_text(size = 12, face = 'bold', family = 'Times'))